----------------------------------------------------------------------------------------
      name:  <unnamed>
       log:  Z:\CLEAN_INNOVATION\archives\Kruse_T\Other\Construct_Master_dataset\OneDriv
> e_1_10-21-2022\Replication_files\Log_file\1_ftsedata_creation.log
  log type:  text
 opened on:   3 Aug 2023, 02:07:24

. /*=======================================*/
. /* FTSE Russel Green Revenue data */
. /*=======================================*/
. // Prepare Green Revenue variables
. 
. 
. // 1) import FTSE bulk data (cross-sectional)
. clear all 

. import excel "Bulk4July2018.xls", sheet("Sheet") cellrange(A2:P16269) firstrow
(16 vars, 16,267 obs)

. 
. // Generate Dummy-variables for type of GR-estimate
. gen SpecEstim = 0 

. replace SpecEstim = 1 if EstimateType=="Specific Estimate"
(384 real changes made)

.  
. gen QuantEstim = 0

. replace QuantEstim = 1 if EstimateType=="Quantitative Estimate"
(1,728 real changes made)

.  
. gen DisclEstim = 0

. replace DisclEstim = 1 if EstimateType=="Disclosed"
(803 real changes made)

.  
. gen EstimInfoNA = 0 

. replace EstimInfoNA = 1 if EstimateType==""
(13,352 real changes made)

. 
. //expand the FTSE-bulk dataset so that have 9 observations per firm, to prepare data f
> or the panel structure of the Subsegment data.
. expand 9
(130,136 observations created)

. sort SEDOL

.  
. //Panel variable: SEDOL_num (strongly balanced)
. //Time variable: Year, 2009 to 2017
. bysort SEDOL: gen Year=2008 + _n

. encode SEDOL, gen(SEDOL_num)

. xtset SEDOL_num Year

Panel variable: SEDOL_num (strongly balanced)
 Time variable: Year, 2009 to 2017
         Delta: 1 unit

. 
. save "${raw}Replication_BulkFTSE.dta", replace
file
    Z:\CLEAN_INNOVATION\archives\Kruse_T\Other\Construct_Master_dataset\OneDrive_1_10-
    > 21-2022\Replication_filesReplication_BulkFTSE.dta saved

. 
. // 2) Prepare sub-segment data which contains the detailed sub-segment Green Revenue d
> ata // 
. // the sub-segment green revenue data is divided into three files (File_A, File_B, Fil
> e_C)
. clear all

. import excel "File_A.xlsx", sheet("Blatt1") firstrow clear
(20 vars, 59,998 obs)

. save "${raw}File_A.dta", replace
file
    Z:\CLEAN_INNOVATION\archives\Kruse_T\Other\Construct_Master_dataset\OneDrive_1_10-
    > 21-2022\Replication_filesFile_A.dta saved

. 
. clear all

. import excel "File_B.xlsx", sheet("Blatt1") firstrow
(20 vars, 59,998 obs)

. save "${raw}File_B.dta", replace
file
    Z:\CLEAN_INNOVATION\archives\Kruse_T\Other\Construct_Master_dataset\OneDrive_1_10-
    > 21-2022\Replication_filesFile_B.dta saved

. 
. clear all

. import excel "File_C.xlsx", sheet("Blatt1") firstrow clear
(20 vars, 68,368 obs)

. save "${raw}File_C.dta", replace
file
    Z:\CLEAN_INNOVATION\archives\Kruse_T\Other\Construct_Master_dataset\OneDrive_1_10-
    > 21-2022\Replication_filesFile_C.dta saved

. 
. * append all datasets
. clear all

. use "${raw}File_A.dta"

. append using "${raw}File_B"
(variable SubSegment was str87, now str91 to accommodate using data's values)
(variable Description was str888, now str1175 to accommodate using data's values)
(variable Remarks was str724, now str781 to accommodate using data's values)

. append using "${raw}File_C"

.  
. generate Year = substr(Title, 3, .) 
(1 missing value generated)

. order Year, after(Title)

.  
. destring Year, generate(YearNum) force 
Year: all characters numeric; YearNum generated as int
(1 missing value generated)

. drop Year

. rename YearNum Year

. drop if missing(Year) 
(1 observation deleted)

. 
. sort SEDOL Year

. 
. save "${raw}Replication_New_FTSE_Data_appended.dta", replace
file
    Z:\CLEAN_INNOVATION\archives\Kruse_T\Other\Construct_Master_dataset\OneDrive_1_10-
    > 21-2022\Replication_filesReplication_New_FTSE_Data_appended.dta saved

. 
. // 3) Merge FTSE bulk to FTSE subsegment data 
. * In the detailed subsegment data we have multiple observations per company, 
. * both due to multiple subsegments as well as due to multiple years. 
. 
. use "${raw}Replication_BulkFTSE.dta", clear

. mmerge SEDOL Year using "${raw}Replication_New_FTSE_Data_appended", type(1:n) unmatche
> d(both) missing(nomatch) 

-------------------------------------------------------------------------------
merge specs          |
       matching type | 1:n
  mv's on match vars | nomatch
  unmatched obs from | both
---------------------+---------------------------------------------------------
  master        file | Z:\CLEAN_INNOVATION\archives\Kruse_T\Other\Construct_Master_datas
> et\OneDrive_1_10-21-2022\Replication_filesReplication_BulkFTSE.dta
                 obs | 146403
                vars |     22
          match vars | SEDOL Year  (key)
  -------------------+---------------------------------------------------------
  using         file | Z:\CLEAN_INNOVATION\archives\Kruse_T\Other\Construct_Master_datas
> et\OneDrive_1_10-21-2022\Replication_filesReplication_New_FTSE_Data_appended.dta
                 obs | 188363
                vars |     21
          match vars | SEDOL Year  (not a key)
---------------------+---------------------------------------------------------
variables SEDOL Year __000000 do not uniquely identify observations in
    E:\TEMP\Kruse_T\ST_3048_000002.tmp
result          file | Z:\CLEAN_INNOVATION\archives\Kruse_T\Other\Construct_Master_datas
> et\OneDrive_1_10-21-2022\Replication_filesReplication_BulkFTSE.dta
                 obs | 310945
                vars |     43  (including _merge)
         ------------+---------------------------------------------------------
              _merge | 122582  obs only in master data                (code==1)
                     |  16953  obs only in using data                 (code==2)
                     | 171410  obs both in master and using data      (code==3)
-------------------------------------------------------------------------------

. 
. // rename variables // 
. rename R SubSegRevShare

. rename Revenue SubSegRevLCU

. rename Segment_Revenue Segment_RevenueLCU_unscaled

. 
. * Manually replace  subsegment revenue shares for observations with corrected values o
> n the online FTSE platform*
. replace SubSegRevShare=0.6705 if (Company=="Cia Paranaense de Energia Copel Pref B" & 
> Year==2010 & SubSegment=="GeT_Hydro")
(1 real change made)

. replace SubSegRevShare=0.9820 if (Company=="Menicon" & Year==2016 & Segment=="Contact 
> Lens" & SubSegment=="Contact Lens")
(1 real change made)

. 
. * compute firm-level revenue 
. egen Tag_SEDOL_Segment_Year = tag(SEDOL Segment Year)

. bysort SEDOL Year: egen Company_Rev_LCU_unscaled = total(Segment_RevenueLCU_unscaled) 
> if Tag_SEDOL_Segment_Year==1
(216,689 missing values generated)

. egen long SEDOLNum = group(SEDOL)

. bysort SEDOLNum Year: mipolate Company_Rev_LCU_unscaled SEDOLNum, generate(Company_Rev
> _LCU_unscaledMipl) groupwise
(122582 missing values generated)

. 
. * Create Segment_Revenue_Share
. gen Segment_Revenue_Share = Segment_RevenueLCU_unscaled/Company_Rev_LCU_unscaledMipl
(122,599 missing values generated)

. 
. * Generate dummy variable to indicate if an observation is green or non-green.
. * If the GR_SubSector is empty-string (""), it is a non-green subsegment
. generate GreenSubSectDum =0

. replace GreenSubSectDum = 1 if GR_SubSector !="" 
(100,297 real changes made)

. 
. * Generate dummy indicating if all subsegments within a particular segment are green, 
> i.e. there is no subsegment non-green, the min of GreenSubSectDum==1.
. egen AllSegGreenDum = min(GreenSubSectDum==1), by(SEDOL Year Segment)

. 
. * Generate dummy indicating if at least one Subsegment within a segment is green, mean
> ing that not all subsegments are allowed to be non-green. 
. egen Atleast1GreenDum = max(GreenSubSectDum==1), by(SEDOL Segment Year)

. 
. // 5) Constructe MinGR variable (3 possible cases)
. 
. * Case 1: All subsegments in a segment are green.
. bysort SEDOL Segment Year: gen MinGreenShare1 = Segment_Revenue_Share if AllSegGreenDu
> m==1
(298,072 missing values generated)

. replace MinGreenShare1 = 0 if MinGreenShare1==.
(298,072 real changes made)

. 
. ** Green Revenue Share is coded as 0 non-green revenue.
. gen SubSegNonGRShare = SubSegRevShare if GreenSubSectDum==0
(222,879 missing values generated)

. egen NonGRShareSegment = total(SubSegNonGRShare), by(SEDOL Segment Year) 

. replace NonGRShareSegment = 0 if NonGRShareSegment==.
(0 real changes made)

. 
. * Case 2: Not all subsegments are green. 
. egen MinNonGRSharewithinSegment = min(SubSegNonGRShare), by(SEDOL Segment Year)
(135,455 missing values generated)

. bysort SEDOL Segment Year: gen MinGreenShare2 = (Segment_Revenue_Share - NonGRShareSeg
> ment) if (AllSegGreenDum==0 & MinNonGRSharewithinSegment!=0 & Atleast1GreenDum==1)
(292,763 missing values generated)

. replace MinGreenShare2=0 if MinGreenShare2==.
(292,763 real changes made)

. 
. * Case 3: At least one green subsegment exists
. egen MinSegmentRevenueShare = min(SubSegRevShare), by(SEDOL Segment Year)
(122,582 missing values generated)

. replace MinSegmentRevenueShare = 0 if MinSegmentRevenueShare==.
(122,582 real changes made)

. bysort SEDOL Segment Year: gen MinGreenShare3 = SubSegRevShare if GreenSubSect==1 & Al
> lSegGreen==0 & MinSegmentRevenueShare==0 & Atleast1Green==1 & MinGreenShare2==0
(234,381 missing values generated)

. replace MinGreenShare3=0 if MinGreenShare3==.
(234,381 real changes made)

. egen MinGreenShare3Segment = total(MinGreenShare3), by(SEDOL Segment Year)

. 
. * Sum GRShare1 & GRShare2 & GRShare3Segment for each observation:
. gen MinGreenShareSum = MinGreenShare1 + MinGreenShare2 + MinGreenShare3Segment 

. 
. * Tag MinGreenShareSum by SEDOL Segment Year
. egen MinGRShareTotal_tag = total(MinGreenShareSum) if Tag_SEDOL_Segment_Year==1, by(SE
> DOL Year)
(216,689 missing values generated)

. bysort SEDOL Segment Year: mipolate MinGRShareTotal_tag SEDOLNum, gen(MinGRShareTotal)
>  groupwise
(122582 missing values generated)

. replace MinGRShareTotal = 0 if MinGRShareTotal==.
(122,582 real changes made)

. 
. * Correct for rounding errors in the raw data where the sum of the green revenue share
>  is above 1, and for green revenue shares less than zero due to rounding errors in the
>  raw data 
. gen MinGRShareTotalNonNeg = MinGRShareTotal 

. replace MinGRShareTotalNonNeg=1 if MinGRShareTotalNonNeg>1 & MinGRShareTotalNonNeg!=. 
>  
(27 real changes made)

. replace MinGRShareTotalNonNeg=0 if MinGRShareTotalNonNeg<0 
(205 real changes made)

. gen MinNonGRShareTotal = 1- MinGRShareTotalNonNeg

. 
. * Generate dummy if a firm-year observation has a strictly positive green revenue 
. gen MinGR_yes = 0

. replace MinGR_yes = 1 if (MinGRShareTotalNonNeg>0 & MinGRShareTotalNonNeg<=1)
(64,376 real changes made)

. 
. egen long isincode = group(ISIN)
(20,051 missing values generated)

. 
. // Merge SIC (2- and 3-digit) industry codes downloaded from Worldscope
. mmerge ISIN Year using "ControlVars_Orbis_WS_MASTER", type(n:1)  missing(nomatch) // M
> erge SIC (2- and 3-digit) industry codes downloaded from Worldscope

-------------------------------------------------------------------------------
merge specs          |
       matching type | n:1
  mv's on match vars | nomatch
  unmatched obs from | both
---------------------+---------------------------------------------------------
  master        file | Z:\CLEAN_INNOVATION\archives\Kruse_T\Other\Construct_Master_datas
> et\OneDrive_1_10-21-2022\Replication_filesReplication_BulkFTSE.dta
                 obs | 310945
                vars |     64
          match vars | ISIN Year  (not a key)
  -------------------+---------------------------------------------------------
  using         file | ControlVars_Orbis_WS_MASTER.dta
                 obs | 159158
                vars |    176
          match vars | ISIN Year  (key)
  -------------------+---------------------------------------------------------
         common vars | Name
---------------------+---------------------------------------------------------
variables ISIN Year __000000 do not uniquely identify observations in the master data
result          file | Z:\CLEAN_INNOVATION\archives\Kruse_T\Other\Construct_Master_datas
> et\OneDrive_1_10-21-2022\Replication_filesReplication_BulkFTSE.dta
                 obs | 327147
                vars |    239  (including _merge)
         ------------+---------------------------------------------------------
              _merge |  20051  obs matchvar==missing in master data  (code==-1)
                     |    977  obs only in master data                (code==1)
                     |  16202  obs only in using data                 (code==2)
                     | 289917  obs both in master and using data      (code==3)
-------------------------------------------------------------------------------

. 
. gen SIC_2digit = substr(USSICprimarycodes, 1,2)
(56,946 missing values generated)

. gen SIC_3digit = substr(USSICprimarycodes, 1,3)
(56,946 missing values generated)

. 
. destring SIC_2digit, gen(SIC_2digitNum)
SIC_2digit: all characters numeric; SIC_2digitNum generated as byte
(56946 missing values generated)

. destring SIC_3digit, gen(SIC_3digitNum)
SIC_3digit: all characters numeric; SIC_3digitNum generated as int
(56946 missing values generated)

. 
. // generate 2-digit SIC1 // 
. tostring SIC1, generate(SIC1_str)
SIC1_str generated as str4

. gen SIC1_2dgts = substr(SIC1_str,1,2)

. destring SIC1_2dgts, gen(SIC1_2dgtsNum)
SIC1_2dgts: all characters numeric; SIC1_2dgtsNum generated as byte
(24362 missing values generated)

. 
. // generate 3-digit SIC1 //
. gen SIC1_3dgts = substr(SIC1_str,1,3)

. destring SIC1_3dgts, gen(SIC1_3dgtsNum)
SIC1_3dgts: all characters numeric; SIC1_3dgtsNum generated as int
(24362 missing values generated)

. bysort ISIN Year: gen firm_n= _n

. 
. 
. /*========================================*/
. /* Create samples of top green firms */
. /*========================================*/
. 
. // Portfolios of firms across green revenue percentiles
. xtile XT10_US_MinGRShareTotalNonNeg = MinGRShareTotalNonNeg if (Year==2013 & MinGRShar
> eTotalNonNeg>0 & MinGRShareTotalNonNeg!=. & CountryName=="United States" & firm_n==1),
>  nquantiles(10)

. 
. // MAIN SAMPLE
. export excel isincode MinGRShareTotalNonNeg XT10_US_MinGRShareTotalNonNeg using "${raw
> }US_MinGR_top8Xtile.xlsx" if (XT10_US_MinGRShareTotalNonNeg==8 | XT10_US_MinGRShareTot
> alNonNeg==9 | XT10_US_MinGRShareTotalNonNeg==10) & Year==2013 & CountryName=="United S
> tates" & firm_n==1, firstrow(variables) replace
file Z:\CLEAN_INNOVATION\archives\Kruse_T\Other\Construct_Master_dataset\OneDrive_1_10-2
> 1-2022\Replication_filesUS_MinGR_top8Xtile.xlsx saved

. 
. // ONLY GREEN REVENUE
. export excel isincode MinGRShareTotalNonNeg XT10_US_MinGRShareTotalNonNeg using "${raw
> }US_MinGR_eq1.xlsx" if MinGRShareTotalNonNeg==1 & Year==2013 & CountryName=="United St
> ates" & firm_n==1, firstrow(variables) replace
file Z:\CLEAN_INNOVATION\archives\Kruse_T\Other\Construct_Master_dataset\OneDrive_1_10-2
> 1-2022\Replication_filesUS_MinGR_eq1.xlsx saved

. 
. // TOP 40% GREEN REVENUE
. export excel isincode MinGRShareTotalNonNeg XT10_US_MinGRShareTotalNonNeg using "${raw
> }US_MinGR_top40pc.xlsx" if (XT10_US_MinGRShareTotalNonNeg==7 | XT10_US_MinGRShareTotal
> NonNeg==8 | XT10_US_MinGRShareTotalNonNeg==9 | XT10_US_MinGRShareTotalNonNeg==10) & Ye
> ar==2013 & CountryName=="United States" & firm_n==1, firstrow(variables) replace
file Z:\CLEAN_INNOVATION\archives\Kruse_T\Other\Construct_Master_dataset\OneDrive_1_10-2
> 1-2022\Replication_filesUS_MinGR_top40pc.xlsx saved

. 
. // MEDIAN GREN FIRMS                       
. export excel isincode MinGRShareTotalNonNeg XT10_US_MinGRShareTotalNonNeg using "${raw
> }US_MinGR_top5Xtile.xlsx" if (XT10_US_MinGRShareTotalNonNeg==5) & Year==2013 & Country
> Name=="United States" & firm_n==1, firstrow(variables) replace
file Z:\CLEAN_INNOVATION\archives\Kruse_T\Other\Construct_Master_dataset\OneDrive_1_10-2
> 1-2022\Replication_filesUS_MinGR_top5Xtile.xlsx saved

. 
. // FIRMS SOME GREEN REVENUE BETWEEN 2009 AND 2013
. * Generate dummy indicator, taking value of 1 if between 2009-13, the MinGR is at leas
> t once strictly positive
. bysort isincode: gen MinGR_yes0913 = MinGR_yes if Year>=2009 & Year<=2013 & firm_n==1
(247,292 missing values generated)

. bysort isincode: egen MinGR_yes0913full = max(MinGR_yes0913)

. export excel isincode MinGR_yes0913full MinGRShareTotalNonNeg CountryName using "${raw
> }US_MinGR_yes0913.xlsx" if CountryName=="United States" & MinGR_yes0913full==1 & Year=
> =2009 & firm_n==1, firstrow(variables) replace
file Z:\CLEAN_INNOVATION\archives\Kruse_T\Other\Construct_Master_dataset\OneDrive_1_10-2
> 1-2022\Replication_filesUS_MinGR_yes0913.xlsx saved

. 
. log close
      name:  <unnamed>
       log:  Z:\CLEAN_INNOVATION\archives\Kruse_T\Other\Construct_Master_dataset\OneDriv
> e_1_10-21-2022\Replication_files\Log_file\1_ftsedata_creation.log
  log type:  text
 closed on:   3 Aug 2023, 02:08:49
----------------------------------------------------------------------------------------
